rm(list=ls())

# see ANALYSIS_README for info on this file

pacman::p_load(rdrobust, tidyverse, readxl, rddtools, lubridate,zipcodeR)

####
# Anley flag sales ----
####

# read in data
dat <- read_csv('flagdata/4x6flag.csv')

# create running variable
dat$day_running <- as.numeric((ymd('2020-05-28') - dat$date)*-1)

returnPlotData <- function(full_plot, yaxis, title){
  plot <- full_plot$vars_bins %>%
    na.omit() 
  return(plot)
}

# FULL SAMPLE
full_plot1 <- rdplot(dat$n,
                    dat$day_running,
                    p=1, kernel = 'triangular')
p1 <- returnPlotData(full_plot1)

# effect size
house_rdd <- rdd_data(y=n, x=day_running, cutpoint=0, data=dat)
bw_ik <- rdd_bw_ik(house_rdd,kernel = 'Triangular')
reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)

# table B5 column 1
print(reg_nonpara)

# size of effect
reg_nonpara$coefficients / sd(dat$n, na.rm=T)

#####
# Twitter data
#####

dat <- read_csv('twitterdata/_tweets_by_day_aggregated.csv')

# plot
dat$day_running <- as.numeric((ymd('2020-05-28') - dat$date)*-1)
full_plot2 <- rdplot(dat$n,dat$day_running,
                    p=1,kernel = 'triangular')
p2 <- returnPlotData(full_plot2) 

# effect
house_rdd <- rdd_data(y=dat$n, x=dat$day_running, cutpoint=0)
bw_ik <- rdd_bw_ik(house_rdd)
reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)

# Table b5 column 2
print(reg_nonpara)

# magnitude
reg_nonpara$coefficients/sd(dat$n)

######
# media sources ----
######

df <- read_csv('mediadata/bluelm_media_coverage.csv')
df$date <- mdy(df$date)
df$day_running <- as.numeric((ymd('2020-05-28') - df$date)*-1)

# plot
full_plot3 <- rdplot(df$blm_count,df$day_running,
                    p=1,kernel = 'triangular')
p3 <- returnPlotData(full_plot3)

# effect
house_rdd <- rdd_data(y=df$blm_count, x=df$day_running, cutpoint=0)
bw_ik <- rdd_bw_ik(house_rdd)
reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
print(reg_nonpara)

# magnitude
reg_nonpara$coefficients/sd(df$blm_count,na.rm=T)


####
# Google trends ----
####

df <- read_csv('googledata/scaled_google_search.csv')
df$date <- ymd(df$date)
df$day_running <- as.numeric((ymd('2020-05-28') - df$date)*-1)

# plot
full_plot4 <- rdplot(df$hits,df$day_running,
                    p=1,kernel = 'triangular')
p4 <- returnPlotData(full_plot4)

# effect
house_rdd <- rdd_data(y=df$hits, x=df$day_running, cutpoint=0)
reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
print(reg_nonpara)

# magnitude
reg_nonpara$coefficients/sd(df$hits,na.rm=T)

# combine

p1 <- p1 %>% mutate(z = 'Flag Sales')
p2 <- p2 %>% mutate(z = 'Tweets')
p3 <- p3 %>% mutate(z = 'Media Coverage')
p4 <- p4 %>% mutate(z = 'Google Trends')

full_plot1$vars_poly$z <- 'Flag Sales'
full_plot2$vars_poly$z <- 'Tweets'
full_plot3$vars_poly$z <- 'Media Coverage'
full_plot4$vars_poly$z <- 'Google Trends'
full_plot_dat <- bind_rows(full_plot1$vars_poly, full_plot2$vars_poly, full_plot3$vars_poly, full_plot4$vars_poly) %>%
  na.omit() %>%
  filter(rdplot_y > 0) %>%
  mutate(z=factor(z, levels=c(
    'Google Trends','Media Coverage','Tweets','Flag Sales'
  )))

floyd_plots <- bind_rows(p1, p2, p3, p4) %>%
  mutate(z = factor(z, levels=c(
    'Google Trends','Media Coverage','Tweets','Flag Sales'
  ))) %>%
  na.omit() %>%
  ggplot(aes(rdplot_mean_x, rdplot_mean_y)) +
  geom_point(size=1, alpha=0.5) +
  geom_vline(xintercept=0, linetype=2) +
  theme_bw() +
  labs(x='Days',y='',title='') +
  theme(plot.title = element_text(hjust=0.5),
        axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0))) +
  facet_wrap(~z, scales='free', ncol=4, drop=T) +
  labs(title='2020 BLM Protests') +

  
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x<0 & full_plot_dat$z == 'Tweets',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F) +
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x>0 & full_plot_dat$z == 'Tweets',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F) +
  
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x<0 & full_plot_dat$z == 'Media Coverage',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F) +
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x>0 & full_plot_dat$z == 'Media Coverage',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F) +
  
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x<0 & full_plot_dat$z == 'Google Trends',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F) +
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x>0 & full_plot_dat$z == 'Google Trends',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F) +
  
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x<0 & full_plot_dat$z == 'Flag Sales',],
          aes(rdplot_x, rdplot_y),
          color='red', size=0.5, inherit.aes = F) +
  geom_line(data=full_plot_dat[full_plot_dat$rdplot_x>0 & full_plot_dat$z == 'Flag Sales',],
            aes(rdplot_x, rdplot_y),
            color='red', size=0.5, inherit.aes = F)  
 floyd_plots 

ggsave(floyd_plots, 
       width=9, height=2,
       filename='rdit_floyd.pdf')

#####
# placebos
#####

dates <- c(ymd('2019-07-25'),
           ymd('2019-08-26'),
           ymd('2019-09-14'),
           ymd('2019-10-12'),
           ymd('2019-10-14'),
           ymd('2020-01-27'),
           ymd('2020-01-29'),
           ymd('2020-03-06'),
           ymd('2020-03-12'),
           ymd('2020-03-20'))

police_ambushed <- ymd(c('2014-12-20', # 2 NYPD killed in marked police car https://abc7news.com/nypd-cop-killed-officer-shot-bronx-shooting-new-york/2186010/
                         '2015-05-09', # MS police officers killed during traffic stop https://abc7news.com/nypd-cop-killed-officer-shot-bronx-shooting-new-york/2186010/
                         '2016-11-02', # scott greene iowa killed 2 cops in surprise attack https://www.desmoinesregister.com/story/news/crime-and-courts/2017/05/19/scott-greene-pleads-guilty-des-moines-area-police-officer-slayings/332469001/
                         '2017-07-05', # NYC police officer killed in Bronx https://abc7news.com/nypd-cop-killed-officer-shot-bronx-shooting-new-york/2186010/
                         '2020-09-12', # Los Angeles # https://apnews.com/article/shootings-carjacking-us-news-ap-top-news-los-angeles-c0eee5a37550850608a1ecd8ecb01811https://apnews.com/article/shootings-carjacking-us-news-ap-top-news-los-angeles-c0eee5a37550850608a1ecd8ecb01811
                         '2020-08-18')) # https://ktla.com/news/local-news/2-officers-shot-in-highland-a-day-after-san-bernardino-county-sheriffs-deputy-wounded-by-gunfire/

# twitter
dat <- read_csv('twitterdata/_tweets_by_day_aggregated.csv')

twitter <- data.frame(coef=rep(NA, length(dates)),
                   se=NA,
                   p=NA,
                   l=NA,
                   u=NA,
                   date=as.Date(NA))
for(i in 1:length(dates)){
  dat$day_running <- as.numeric(dates[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$n, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  twitter[i,] <- data.frame(coef=reg_nonpara$coefficients,
                         se=reg_nonpara$coefMat[2],
                         p=reg_nonpara$coefMat[4],
                         l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                         u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                         date=ymd(dates[i]))
  print(i)
}
twitter  %>% arrange(date)

dat <- read_csv('twitterdata/new_full_tweets.csv')
twitter_ambush <- data.frame(coef=rep(NA, length(police_ambushed)),
                      se=NA,
                      p=NA,
                      l=NA,
                      u=NA,
                      date=as.Date(NA))

for(i in 1:length(police_ambushed)){
  dat$day_running <- as.numeric(police_ambushed[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$n, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  twitter_ambush[i,] <- data.frame(coef=reg_nonpara$coefficients,
                            se=reg_nonpara$coefMat[2],
                            p=reg_nonpara$coefMat[4],
                            l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                            u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                            date=ymd(police_ambushed[i]))
  print(i)
}
twitter_ambush %>% arrange(date)

# anley placebos
dat <- read_csv( 'flagdata/4x6flag.csv')

anley <- data.frame(coef=rep(NA, length(dates)),
                      se=NA,
                      p=NA,
                      l=NA,
                      u=NA,
                      date=as.Date(NA))
for(i in 1:length(dates)){
  dat$day_running <- as.numeric(dates[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$n, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  anley[i,] <- data.frame(coef=reg_nonpara$coefficients,
                            se=reg_nonpara$coefMat[2],
                            p=reg_nonpara$coefMat[4],
                            l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                            u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                            date=ymd(dates[i]))
  print(i)
}
anley %>% arrange(date)

anley_ambush <- data.frame(coef=rep(NA, length(police_ambushed)),
                    se=NA,
                    p=NA,
                    l=NA,
                    u=NA,
                    date=as.Date(NA))
for(i in 5:length(police_ambushed)){
  dat$day_running <- as.numeric(police_ambushed[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$n, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  anley_ambush[i,] <- data.frame(coef=reg_nonpara$coefficients,
                          se=reg_nonpara$coefMat[2],
                          p=reg_nonpara$coefMat[4],
                          l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                          u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                          date=ymd(police_ambushed[i]))
  print(i)
}

anley_ambush %>% arrange(date) %>%
 drop_na()

####
# media placebos
####

dat <- read_csv('mediadata/bluelm_media_coverage.csv')
dat$date <- mdy(dat$date)

media <- data.frame(coef=rep(NA, length(dates)),
                    se=NA,
                    p=NA,
                    l=NA,
                    u=NA,
                    date=as.Date(NA))
for(i in 1:length(dates)){
  dat$day_running <- as.numeric(dates[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$blm_count, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  media[i,] <- data.frame(coef=reg_nonpara$coefficients,
                          se=reg_nonpara$coefMat[2],
                          p=reg_nonpara$coefMat[4],
                          l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                          u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                          date=ymd(dates[i]))
  print(i)
}
media %>% arrange(date)

dat <- read_csv('mediadata/media2.csv') # longer time span
dat$date <- mdy(dat$date)

media_ambush <- data.frame(coef=rep(NA, length(police_ambushed)),
                    se=NA,
                    p=NA,
                    l=NA,
                    u=NA,
                    date=as.Date(NA))
for(i in 1:length(police_ambushed)){
  dat$day_running <- as.numeric(police_ambushed[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$bluelivesmatter, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  media_ambush[i,] <- data.frame(coef=reg_nonpara$coefficients,
                          se=reg_nonpara$coefMat[2],
                          p=reg_nonpara$coefMat[4],
                          l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                          u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                          date=ymd(police_ambushed[i]))
  print(i)
}
media_ambush %>% arrange(date)

###
# google trends 
###

dat <- read_csv('googledata/scaled_google_search.csv')
dat$date <- ymd(dat$date)

trendz <- data.frame(coef=rep(NA, length(dates)),
                     se=NA,
                     p=NA,
                     l=NA,
                     u=NA,
                     date=as.Date(NA))
for(i in 1:length(dates)){
  dat$day_running <- as.numeric(dates[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$hits, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  trendz[i,] <- data.frame(coef=reg_nonpara$coefficients,
                           se=reg_nonpara$coefMat[2],
                           p=reg_nonpara$coefMat[4],
                           l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                           u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                           date=ymd(dates[i]))
  print(i)
}
trendz %>% arrange(date) %>% mutate_at(1:5, function(x) round(x, 2))

police_ambushed <- ymd(c('2014-12-20', # 2 NYPD killed in marked police car https://abc7news.com/nypd-cop-killed-officer-shot-bronx-shooting-new-york/2186010/
                         '2015-05-09', # MS police officers killed during traffic stop https://abc7news.com/nypd-cop-killed-officer-shot-bronx-shooting-new-york/2186010/
                         '2016-11-02', # scott greene iowa killed 2 cops in surprise attack https://www.desmoinesregister.com/story/news/crime-and-courts/2017/05/19/scott-greene-pleads-guilty-des-moines-area-police-officer-slayings/332469001/
                         '2017-07-05', # NYC police officer killed in Bronx https://abc7news.com/nypd-cop-killed-officer-shot-bronx-shooting-new-york/2186010/
                         '2020-09-12', # Los Angeles # https://apnews.com/article/shootings-carjacking-us-news-ap-top-news-los-angeles-c0eee5a37550850608a1ecd8ecb01811https://apnews.com/article/shootings-carjacking-us-news-ap-top-news-los-angeles-c0eee5a37550850608a1ecd8ecb01811
                         '2020-08-18')) # https://ktla.com/news/local-news/2-officers-shot-in-highland-a-day-after-san-bernardino-county-sheriffs-deputy-wounded-by-gunfire/

trendz_ambush <- data.frame(coef=rep(NA, length(police_ambushed)),
                            se=NA,
                            p=NA,
                            l=NA,
                            u=NA,
                            date=as.Date(NA))
for(i in 1:length(police_ambushed)){
  dat$day_running <- as.numeric(police_ambushed[i] - dat$date)*-1
  house_rdd <- rdd_data(y=dat$hits, x=dat$day_running, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  trendz_ambush[i,] <- data.frame(coef=reg_nonpara$coefficients,
                                  se=reg_nonpara$coefMat[2],
                                  p=reg_nonpara$coefMat[4],
                                  l=reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2],
                                  u=reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2],
                                  date=ymd(police_ambushed[i]))
  print(i)
}

trendz_ambush %>% arrange(date)

###
# State Level Comparisons
###

dat_comb <- bind_rows(
  read_csv('googledata/ny_blue_lives_matter.csv') %>% dplyr::mutate(state = 'New York'),
  read_csv('googledata/ca_blue_lives_matter.csv') %>% dplyr::mutate(state = 'California')
) %>%
  dplyr::select(-1)

runRDIT <- function(date, dat, state){
  cut = lubridate::ymd(date) 
  dat_mini <- dat %>% arrange(date)
  dat_mini$running_day <- as.numeric(dat_mini$date - cut)
  dat_mini$hits <- as.numeric(dat_mini$hits)
  
  house_rdd <- rdd_data(y=as.numeric(dat_mini$hits), x=dat_mini$running_day, cutpoint=0)
  bw_ik <- rdd_bw_ik(house_rdd)
  reg_nonpara <- rdd_reg_np(rdd_object=house_rdd, bw=bw_ik)
  rddtoolsest <- data.frame(Est = 'Rdd Tools Coef',
                            Coef=reg_nonpara$coefficients, #/sd_out ,
                            lower=(reg_nonpara$coefficients - 1.96 * reg_nonpara$coefMat[2]),#/sd_out,
                            upper=(reg_nonpara$coefficients + 1.96 * reg_nonpara$coefMat[2]),
                            state=state,
                            date=date)#/sd_out)
  return(rddtoolsest)
}

bind_rows(runRDIT('2014-12-20', dat_comb[dat_comb$state=='New York',],'New York'),
          runRDIT('2017-07-05', dat_comb[dat_comb$state=='New York',],'New York'), 
          runRDIT('2020-08-18', dat_comb[dat_comb$state=='California',],'California'),
          runRDIT('2020-09-12', dat_comb[dat_comb$state=='California',],'California'))

bind_rows(runRDIT('2020-05-28', dat_comb[dat_comb$state=='New York',],'New York'),
          runRDIT('2020-05-28', dat_comb[dat_comb$state=='California',],'California'))


# tweets
ca <- read_csv('twitterdata/all_tweet_ca.csv')
ny <- read_csv('twitterdata/all_tweet_ny.csv')
ia <- read_csv('twitterdata/all_tweet_ia.csv')
ms <- read_csv('twitterdata/all_tweet_ms.csv')

fulldates <- data.frame(date=ymd(seq(min(ca$date), max(ca$date), by='day')))
ca <- left_join(fulldates, ca, by='date') %>%
  mutate(hits = case_when(
    is.na(hits) ~ 0,
    TRUE ~ hits
  ))
ny <- left_join(fulldates, ny, by='date') %>%
  mutate(hits = case_when(
    is.na(hits) ~ 0,
    TRUE ~ hits
  ))
ia <- left_join(fulldates, ia, by='date') %>%
  mutate(hits = case_when(
    is.na(hits) ~ 0,
    TRUE ~ hits
  ))
ms <- left_join(fulldates, ms, by='date') %>%
  mutate(hits = case_when(
    is.na(hits) ~ 0,
    TRUE ~ hits
  ))

bind_rows(runRDIT('2020-08-18', ca, 'California'),
          runRDIT('2020-09-12', ca, 'California'),
          runRDIT('2017-07-05', ny, 'New York'),
          runRDIT('2014-12-20', ny, 'New York'),
          runRDIT('2016-11-02', ia, 'Iowa'),
          runRDIT('2015-05-09', ms, 'Mississippi'))

bind_rows(runRDIT('2020-05-28', ca, 'California'),
          runRDIT('2020-05-28', ny, 'New York'),
          runRDIT('2020-05-28', ia, 'Iowa'),
          runRDIT('2020-05-28', ms, 'Mississippi'))

# flag sales
ca <- read_csv('flagdata/ca_flags.csv')

bind_rows(runRDIT('2020-08-18', ca, "California"),
          runRDIT('2020-09-12', ca, "California"))

runRDIT('2020-05-28', ca, "California")

# Media coverage
ca <- read_csv('mediadata/ca_media.csv')
ny <- read_csv('mediadata/ny_media.csv')

bind_rows(runRDIT('2020-08-18', ca, "California"),
          runRDIT('2020-09-12', ca, "California"),
          runRDIT('2017-07-05', ny, "New York"),
          runRDIT('2014-12-20', ny, "New York"))
bind_rows(runRDIT('2020-05-28', ca, "California"),
          runRDIT('2020-05-28', ny, "New York"))

# Make Figure B.1
df <- read_csv('googledata/scaled_google_search.csv')
plot <- df %>%
  pivot_longer(c('daily', 'monthly','hits')) %>%
  mutate(name = case_when(
    name == 'daily' ~ "Daily",
    name == 'monthly' ~ "Monthly",
    name == 'hits' ~ 'Scaled Value'
  )) %>%
  ggplot(aes(date, value)) +
  geom_line() + 
  facet_wrap(~name, ncol=1, scales='free_y') +
  labs(y="Google Searches\n 'Blue Lives Matter'", x='') +
  theme_bw()
plot



